// This file describes a pre-aggregated event format for further processing and
// displaying of trace data.
//
// A trace can contain events from several devices. Each device has
// several resources. These can be threads on a CPU or specific computation
// units on hardware. Within each resource, many trace events occur over time.
// Most resources can only execute one thing at a time and so trace events don't
// overlap in those.
//
// Use cases:
// ==========
// Traces in this format are can be consumed by timeline views (e.g. like
// the chrome trace viewer).
//
// The goal is to have this data be independent of a specific hardware type and
// be able to represent traces of arbitrary combinations of CPU, GPU, TPU and
// whatever else might come up.
//
// Data format:
// ============
// The messages Trace>Device>Resource form the metadata necessary to understand
// this trace. TraceEvent objects themselves are not nested within these
// structures for two reasons:
//
// - Efficient on-disk representation: A trace can become huge and have millions
//   of trace events. If the events were nested inside the other data
//   structures, a single large proto would have to be stored. Having the traces
//   outside of those structures means that efficient and shardable storage
//   formats can be used.
//
// - Streaming use cases: Some consumers might not be interested in a
//   per-device-and-resource-structured representation. They might just want to
//   churn through all of the trace events to get aggregate representations. In
//   such cases, it's much more effective to churn through the events one by one
//   rather than creating the huge memory structure.
//
// The downside is obviously that two additional integers need to be stored per
// trace event to identify the device and resource it occurred in.
//
// Timestamps:
// ===========
// Events may be as short as one processor cycle - on the order of 1 ns.
// Timestamps are therefore 64-bit picosecond counters, with 0 being the
// start of the trace, and overflow occurring after ~200 days.

syntax = "proto2";

package tensorflow.profiler;

import "tensorflow/core/profiler/protobuf/task.proto";

option cc_enable_arenas = true;

// A 'Trace' contains metadata for the individual traces of a system.
message Trace {
  // The devices that this trace has information about. Maps from device_id to
  // more data about the specific device.
  map<uint32, Device> devices = 1;

  // The tasks that were traced, keyed by a unique ID for the server on
  // which the task ran.
  map<uint32, tensorflow.profiler.Task> tasks = 6;

  // The time range that this trace covers.
  // Timestamps are picoseconds since tracing started.
  optional uint64 min_timestamp_ps = 4;  // Start of first event.
  optional uint64 max_timestamp_ps = 5;  // End of last event.
  optional uint64 num_events = 7;

  // String intern table for event's name or TraceMe argument.
  map<fixed64, string> name_table = 8;

  reserved 2, 3;
}

// A 'device' is a physical entity in the system and is comprised of several
// resources.
message Device {
  // The name of the device.
  optional string name = 1;

  // The id of this device, unique in a single trace.
  optional uint32 device_id = 2;

  // The resources on this device, keyed by resource_id;
  map<uint32, Resource> resources = 3;

  reserved 4;
}

// A 'resource' generally is a specific computation component on a device. These
// can range from threads on CPUs to specific arithmetic units on hardware
// devices.
message Resource {
  // The name of the resource.
  optional string name = 1;

  // The id of the resource. Unique within a device.
  optional uint32 resource_id = 2;

  // Number of events added to this resource.
  optional uint32 num_events = 3;
}

/* TraceEvent can represent four kinds of different events specified by
 * go/trace-event-format
 * 1. Complete/Duration Event
 * 2. Flow Event
 * 3. Counter Event
 * 4. Async Event
 *
 * =======================================================================
 *                         |  has_flow_id = true   |   has_flow_id = false
 * =======================================================================
 * has_resource_id = true  |      flow event       |     complete event
 * =======================================================================
 * has_resource_id = false |      async event      |     counter event
 * =======================================================================
 *
 *  for async events:
 *  if flow_entry_type == FLOW_MID, a pair of json events is generated for a
 *  single async event (one for begin and one for end). if flow_entry_type is
 *  FLOW_START or FLOW_END, a single json event is generated correspondingly.
 *  flow_category is Mandetory for async event even if it is "".
 */

message TraceEvent {
  // The id of the device that this event occurred on. The full dataset should
  // have this device present in the Trace object.
  optional uint32 device_id = 1;

  // The id of the resource that this event occurred on. The full dataset should
  // have this resource present in the Device object of the Trace object. A
  // resource_id is unique on a specific device, but not necessarily within the
  // trace.
  // NOTE: counter events do not have this field set as they are per device.
  optional uint32 resource_id = 2;

  oneof name_oneof {
    // The name of this trace event.
    string name = 3;
    // Reference of the name in Trace's name_table (e.g. in SStable format).
    fixed64 name_ref = 12;
  }

  // The group id which this event belongs to. This allows the trace viewer to
  // show only a particular group of trace events.
  optional int64 group_id = 5 [default = -1];

  // The timestamp when this event occurred (picos since tracing started).
  // This timestamp is in the range [min_timestamp, max_timestamp].
  optional uint64 timestamp_ps = 6;

  // The duration of the event in picoseconds, if applicable.
  // Events without duration are called instant events.
  optional uint64 duration_ps = 7;

  // Storage for additional details, e.g. the raw data that led to this
  // TraceEvent. These are stored as raw data so that we don't pay the
  // deserialization cost (memory and runtime) if the data isn't used.
  // See RawData in trace_events_raw.proto.
  optional bytes raw_data = 8;

  // Used to correlate the multiple events of a flow.
  optional uint64 flow_id = 9;

  // Indicates the order of the event within a flow.
  // Events with the same flow_id will appear in trace_viewer linked by arrows.
  // For an arrow to be shown, at least the FLOW_START and FLOW_END must be
  // present. There can be zero or more FLOW_MID events in the flow. Arrows are
  // drawn from FLOW_START to FLOW_END and through each FLOW_MID event in
  // timestamp order.
  enum FlowEntryType {
    FLOW_NONE = 0;
    FLOW_START = 1;
    FLOW_MID = 2;
    FLOW_END = 3;
  }
  optional FlowEntryType flow_entry_type = 10;
  optional uint32 flow_category = 11;

  // For streaming trace viewer frontend deduplication, we need an unique id
  // for each events, in the same time, we want to reduce the entropy overhead
  // introduced by this. therefore we will use tuple<device_id, timestamp_ps,
  // serial> as unique ids, serial is optional and only required when timestamp
  // is not unique.
  optional uint32 serial = 13;
  reserved 4;
}
